library('tidyverse')
library('tidyquant')
library('tseries')
library('gghighlight')
library('gganimate')

#load('api_key.rdata')

Time series analysis:

What is a stationary time series?

“A time series is stationary if a shift in time doesn’t cause a change in the shape of the distribution. Basic properties of the distribution like the mean, variance, and covariance are constant over time.”

Stationary vs Seasonal Time series - http://www.statisticshowto.com/stationarity/

Stationary vs Seasonal Time series - http://www.statisticshowto.com/stationarity/

Different types of stationarity:

Why do we care about stationarity?

Most statistical models that have been developed assume stationarity.

set.seed(100)

# monthly data, normally distributed
# 100 stoltzcoin average trade price per month
# Standard deviation of 3
dates = seq(from = lubridate::ymd('2011-07-01'), 
            to = lubridate::ymd('2018-06-30'), 
            by = 'months')

n = length(dates)
mu = 100
sd = 3

time_series = tibble(date = dates, stoltzcoin = rnorm(n = n, mean = mu, sd = sd), 
                     type = 'stationary', trend = 'none')

ggplot(time_series, aes(x = date, y = stoltzcoin)) + 
  geom_line() + geom_smooth(method = lm, 
                            formula = y ~ x, se = FALSE) + 
  geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")

Perform regression and look at the residuals

ts_fit = lm(stoltzcoin ~ date, data = time_series)
ts_fit
## 
## Call:
## lm(formula = stoltzcoin ~ date, data = time_series)
## 
## Coefficients:
## (Intercept)         date  
##   1.082e+02   -5.002e-04
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

Always test for stationarity even if it looks stationary

Augmented Dickey-Fuller is a typical test - tseries::adf.test()
  • Tests null hypothesis that the autoregressive model has a unit root
  • Stationarity means rejecting the null hypothesis
  • We will look for p-value < 0.05
# adf.test only accepts univariate time series data, zoo object will be used
adf.test(zoo::zoo(time_series$stoltzcoin, time_series$date))
## Warning in adf.test(zoo::zoo(time_series$stoltzcoin, time_series$date)): p-
## value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(time_series$stoltzcoin, time_series$date)
## Dickey-Fuller = -4.1967, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Add a linear trend for the example

Stoltzcoin has been going up by 2 each month

Known as “trend stationary”

going_up_by = 2
trend = seq(from = 0, 
            to = (n-1) * going_up_by,
            by = going_up_by)

ts_lin_trend = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend, 
                     type = 'non-stationary', trend = 'linear')

ggplot(ts_lin_trend, aes(x = date, y = stoltzcoin)) + 
  geom_line() + geom_smooth(method = lm, 
                            formula = y ~ x, se = FALSE) + 
  geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")

Perform regression and look at the residuals

ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend)
ts_fit
## 
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend)
## 
## Coefficients:
## (Intercept)         date  
##  -887.75051      0.06521
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

adf.test(zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date)):
## p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(ts_lin_trend$stoltzcoin, ts_lin_trend$date)
## Dickey-Fuller = -4.1967, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Add a seasonal component to the linear trend

Stoltzcoin fluctuates throughout the year

going_up_by = 2
trend = seq(from = 0, 
            to = (n-1) * going_up_by,
            by = going_up_by)
seasonal = sin(1:n) * 10

ts_lin_trend_seasonal = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend + seasonal, 
                     type = 'non-stationary', trend = 'linear')

ggplot(ts_lin_trend_seasonal, aes(x = date, y = stoltzcoin)) + 
  geom_line() + geom_smooth(method = lm, 
                            formula = y ~ x, se = FALSE) + 
  geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")

Perform regression and look at the residuals

ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend_seasonal)
ts_fit
## 
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend_seasonal)
## 
## Coefficients:
## (Intercept)         date  
##  -887.83081      0.06523
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

adf.test(zoo::zoo(ts_lin_trend_seasonal$stoltzcoin, ts_lin_trend_seasonal$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend_seasonal$stoltzcoin,
## ts_lin_trend_seasonal$date)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(ts_lin_trend_seasonal$stoltzcoin, ts_lin_trend_seasonal$date)
## Dickey-Fuller = -6.4213, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

Add an exponential trend to the seasonal component

Stoltzcoin has increased in volatility over the years

  • Stoltzcoin now has:
    • A positive and growing linear trend
    • Seasonality
going_up_by = 2
trend = seq(from = 0, 
            to = (n-1) * going_up_by,
            by = going_up_by)
seasonal = sin(1:n) * 1:n

ts_lin_trend_seasonal_exp = tibble(date = dates, stoltzcoin = time_series$stoltzcoin + trend + seasonal, 
                     type = 'non-stationary', trend = 'linear')

ggplot(ts_lin_trend_seasonal_exp, aes(x = date, y = stoltzcoin)) + 
  geom_line() + geom_smooth(method = lm, 
                            formula = y ~ x, se = FALSE) + 
  geom_hline(aes(yintercept = mean(stoltzcoin)), color="red")

Perform regression and look at the residuals

ts_fit = lm(stoltzcoin ~ date, data = ts_lin_trend_seasonal_exp)
ts_fit
## 
## Call:
## lm(formula = stoltzcoin ~ date, data = ts_lin_trend_seasonal_exp)
## 
## Coefficients:
## (Intercept)         date  
##  -925.35762      0.06756
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

adf.test(zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin, ts_lin_trend_seasonal_exp$date))
## Warning in adf.test(zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin,
## ts_lin_trend_seasonal_exp$date)): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(ts_lin_trend_seasonal_exp$stoltzcoin, ts_lin_trend_seasonal_exp$date)
## Dickey-Fuller = -6.2925, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary

R’s decompose function does a lot for you

d1y =  as.numeric(lubridate::year(min(ts_lin_trend_seasonal_exp$date)))
d1m = as.numeric(lubridate::month(min(ts_lin_trend_seasonal_exp$date)))
d2y =  as.numeric(lubridate::year(max(ts_lin_trend_seasonal_exp$date)))
d2m = as.numeric(lubridate::month(max(ts_lin_trend_seasonal_exp$date)))

x = ts(ts_lin_trend_seasonal_exp$stoltzcoin, start=c(d1y, d1m), end=c(d2y, d2m), frequency=12)

plot(decompose(x, type = 'additive'))

plot(decompose(x, type = 'multiplicative'))

Example from decompose function

random portion should just look like white noise (otherwise you’ve likely missed something)

require(graphics)

m <- decompose(co2)
plot(m)

dow = tq_index("DOW") %>%
  tq_get(get = "stock.prices")
head(dow, 10)
p = ggplot(dow, aes(x = date, y = close, col = symbol)) + 
  geom_line() + 
  gghighlight(max(close) > 250) + 
  theme_minimal() + 
  facet_wrap(~ symbol)
print(p)

Is BA a stationary time series?

Can you simply zoom in and find a portion that is stationary?

ba = dow %>% 
  filter(symbol == 'BA') %>% 
  select(date, close)
plotly::ggplotly(ggplot(ba, aes(x = date, y = close)) + geom_line() + geom_smooth(method = lm, se = FALSE))
adf.test(zoo::zoo(ba$close, ba$date))
## Warning in adf.test(zoo::zoo(ba$close, ba$date)): p-value greater than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(ba$close, ba$date)
## Dickey-Fuller = 0.29712, Lag order = 13, p-value = 0.99
## alternative hypothesis: stationary
ts_fit = lm(close ~ date, data = ba)
ts_fit
## 
## Call:
## lm(formula = close ~ date, data = ba)
## 
## Coefficients:
## (Intercept)         date  
##  -735.91762      0.05393
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

Are the daily returns stationary?

ret = ba %>%
  tq_transmute(close, mutate_fun = dailyReturn)
ggplot(ret, aes(x = date, y = daily.returns)) + geom_line() + geom_smooth(method = lm, se = FALSE)

adf.test(zoo::zoo(ret$daily.returns, ret$date))
## Warning in adf.test(zoo::zoo(ret$daily.returns, ret$date)): p-value smaller
## than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  zoo::zoo(ret$daily.returns, ret$date)
## Dickey-Fuller = -14.31, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
ts_fit = lm(daily.returns ~ date, data = ret)
ts_fit
## 
## Call:
## lm(formula = daily.returns ~ date, data = ret)
## 
## Coefficients:
## (Intercept)         date  
##  -8.482e-03    5.807e-07
res = tibble(residuals = resid(ts_fit))
ggplot(res, aes(x = 1:nrow(res), y = residuals)) + geom_point() + geom_rug(sides = 'l')

ggplot(res, aes(x = residuals)) + geom_histogram(aes(y = ..density..), bins = 40) + geom_density(col = 'red', size = 1.5)

ts_fit1 = lm(daily.returns ~ date, data = ret)
price = tibble(residuals = resid(ts_fit1))
ts_fit2 = lm(close ~ date, data = ba)
price.diff = tibble(residuals = resid(ts_fit2))

ba1 = bind_cols(daysFromStart = 1:nrow(res), price, price.diff)
ba2 = ba1 %>%
  gather(price_type, price, -daysFromStart)
ba2
ggplot(ba2, aes(daysFromStart, price)) + 
  geom_point() + 
  # Here comes the gganimate code
  transition_states(
    price_type,
    transition_length = 2,
    state_length = 1
  ) +
  view_follow()